library(tidyverse)
library(car)

## set working directory to Dataverse
## folder

# loading data
survey <- read.table("Harris_Data/Harris 1982 Omnibus State of the Union Address Survey, study no. 822102/harris_s822102_spss.tab", header = TRUE)

# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- "822102"

# study year (year)
survey$year <- 1982

# geographic data (urban)
survey$urban <- NA

# geographic data (region)
survey$region <- dplyr::recode(survey$S11,
                               `6` = "Midwest",
                               `2` = "East",
                               `4` = "South",
                               `8` = "West",
                               `3` = "South",
                               `5` = "Midwest",
                               `1` = "East",
                               `7` = "West")
table(survey$region)

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
table(survey$Q5_2)
survey$inequality <- (car::recode(survey$Q5_2, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; else = NA"))
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
table(survey$F4_1)

survey$union.self <- dplyr::recode(survey$F4_1,
                                   `1` = "Yes",
                                   `0` = "No")
survey$union.self[survey$F4_4 == 1] <- "Not Sure"

survey$union.other <- dplyr::recode(survey$F4_2,
                                    `1` = "Yes",
                                    `0` = "No")
survey$union.other[survey$F4_4 == 1] <- "Not Sure"

table(survey$union.self)
table(survey$union.other)

# employment (employed)
survey$employed <- NA

# empl self
survey$employed.self <- NA
# occupation
survey$occupation <-  (dplyr::recode(survey$F1,
                                          `1` = "Professional",
                                          `2` = "Manager, official",
                                          `3` = "Proprietor (small business)",
                                          `4` = "Clerical worker",
                                          `5` = "Sales worker",
                                          `6` = "Skilled craftsman, foreman",
                                          `7` = "Operative, unskilled laborer (except farm)", 
                                          `8` = "Service worker",
                                          `9` = "Farmer, farm manager, farm laborer",
                                          `10` = "Student",
                                          `11` = "Housewife",
                                          `12` = "Military service",
                                          `13` = "Unemployed",
                                          `14` = "Retired",
                                          `15` = "Welfare",
                                          `16` = "Disabled",
                                          `17` = "Other (SPECIFY)",
                                          `18` = "Not sure/refused"))
table(survey$occupation)

## occ self
survey$occupation.self <- NA

# household size (hhsize)
survey$hhsize <- NA

# education (educ)
table(survey$F3)
survey$educ <- car::recode(survey$F3, 
                           "1 = 'No formal schooling';
                           2 = '1-7 years completed';
                           3 = '8 years completed';
                           4 = 'Some high school';
                           5 = 'High school graduate';
                           6 = 'Some college';
                           7 = 'Two year college graduate';
                           8 = 'Four year college graduate';
                           9 = 'Post graduate';
                           10 = 'Trade/technical/vocational after high school';
                           11 = 'Not sure';
                           else = 'NA'") 
table(survey$educ)                          

# household income (income)
survey$income <-  (dplyr::recode(survey$F10,
                                      `1` = "$7,500 or less",
                                      `2` = "$7,501 to $15,000",
                                      `3` = "$15,001 to $25,000",
                                      `4` = "$25,001 to $35,000",
                                      `5` = "$35,001 to $50,000",
                                      `6` = "$50,001 or over",
                                      `7` = "Not sure/no answer/refused"))
table(survey$income)

# age
survey$age <-  (dplyr::recode(survey$F2,
                                   `1` = "18-20",
                                   `2` = "21-24",
                                   `3` = "25-29",
                                   `4` = "30-34",
                                   `5` = "35-39",
                                   `6` = "40-49",
                                   `7` = "50-64",
                                   `8` = "65 and over",
                                   `9` = "Refused"))
table(survey$age)


# race
survey$race1 <-  dplyr::recode(survey$F11,
                                    `1` = "White",
                                    `2` = "Black",
                                    `3` = "Oriental/Asian or Pacific Islander",
                                    `4` = "American Indian or Alaskan native",
                                    `5` = "Not sure/refused")
survey$race2 <- dplyr::recode(survey$F12,
                              `3` = "Decline/not sure",
                              `1` = "Yes, hispanic",
                              `2` = "No, not hispanic")
table(survey$race1)
table(survey$race2)
sum(is.na(survey$race2))

survey$race <- ifelse(survey$race1 == "Not sure/refused" |
                        is.na(survey$race1), 
                      "Decline/not sure", ifelse(survey$race1 == "White",
                                                 ifelse(survey$race2 == "Decline/not sure" |
                                                          is.na(survey$race2),
                                                        "Decline/not sure",
                                                        ifelse(survey$race2 == "No, not hispanic",
                                                               "Non-Hispanic White",
                                                               "Hispanic White")), "Non-white"))
table(survey$race)
table(survey$race[survey$race1 == "White"])

# politics (party)
survey$party <-  (dplyr::recode(survey$F6,
                                     `1` = "Republican",
                                     `2` = "Democrat",
                                     `3` = "Independent",
                                     `4` = "Other",
                                     `5` = "Not sure"))
table(survey$party)

# politics (ideology)
survey$ideology <-  (dplyr::recode(survey$F9,
                                        `1` = "Conservative",
                                        `2` = "Middle-of-the-road",
                                        `3` = "Liberal",
                                        `4` = "Not sure"))
table(survey$ideology)

# gender
table(survey$S1)
survey$gender <-  (dplyr::recode(survey$S1,
                                      `1` = "Male",
                                      `2` = "Female"))
table(survey$gender)


# religion
survey$religion <-  (dplyr::recode(survey$F5,
                                        `1` = "Protestant",
                                        `2` = "Catholic",
                                        `3` = "Jewish",
                                        `4` = "Other (SPECIFY)",
                                        `5` = "None",
                                        `6` = "Not sure/no answer/refused"))
table(survey$religion)

# factuals
table(survey$Q4A)
survey$factual1 <- dplyr::recode(survey$Q4A,
                                 `1` = "Saw or heard",
                                 `2` = "Did not see or hear",
                                 `3` = "Read about but didn't see or hear",
                                 `4` = "Not sure")
table(survey$factual1)
survey$factual2 <- NA
survey$factual3 <- NA

## alienation
table(survey$Q5_1)
survey$dontcare <- car::recode(survey$Q5_1, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; else = NA")
survey$dontcount <- car::recode(survey$Q5_3, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; else = NA")
survey$leftout <- car::recode(survey$Q5_4, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; else = NA")

## question placement
survey$question_place <- "before party"


# subset
survey_822102 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                           "inequality", "inequality.variable", "union.self", "union.other",
                           "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                           "age", "race", "party", "ideology", "gender", "religion",
                           "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                           "question_place")]
summary(survey_822102)


# save file
#saveRDS(survey_822102, file = "Harris_Data/survey_822102.rds")
